Houston Hobby Airport data

For this month’s data jam meet up, we worked with data from Monday at the Houston Hobby airport. This data is sourced from FlightAware’s API.

We have the data available in csv, each in their respective directories.

make_csv_url <- function(name){
  url <- paste('https://raw.githubusercontent.com/houstondatavis/data-jam-february-2017/data-pipeline/', name, '.csv', sep='')

  return(url)
}

flights <- read.csv(make_csv_url('flights'))

routes <- read.csv(make_csv_url('routes'))

tracks <- read.csv(make_csv_url('tracks'))

weather <- read.csv(make_csv_url('weather'))

Checking the unique flight$ident in flights data

dim(flights)
## [1] 201  12
flight_numbers <- unique(flights$ident, incomparables = FALSE)

Extract the flight ident from the routes flight_id

routes$flight_no <- substring(routes$flight_id, 1,7)
head(routes)
##    name             type latitude  longitude
## 1  KMAF   Origin Airport 31.94253 -102.20192
## 2   SJT VOR-TAC (NAVAID) 31.37500 -100.45500
## 3   LLO VOR-TAC (NAVAID) 30.79633  -98.78739
## 4 CHUKY         Waypoint 30.69653  -98.02244
## 5 OWEEE  Reporting Point 30.56417  -97.06481
## 6 PHEBE         Waypoint 30.49147  -96.79300
##                         flight_id order flight_no
## 1 SWA4569-1487312847-airline-0540     0   SWA4569
## 2 SWA4569-1487312847-airline-0540     1   SWA4569
## 3 SWA4569-1487312847-airline-0540     2   SWA4569
## 4 SWA4569-1487312847-airline-0540     3   SWA4569
## 5 SWA4569-1487312847-airline-0540     4   SWA4569
## 6 SWA4569-1487312847-airline-0540     5   SWA4569

Extract flight ident no from the tracks flight_id

tracks$flight_no <- substring(tracks$flight_id, 1,7)
head(tracks)
##    timestamp latitude longitude groundspeed altitude altitudeStatus
## 1 1487556699 31.95944 -102.2478         147       50              -
## 2 1487556759 31.99778 -102.2831         230       66              -
## 3 1487556820 32.01278 -102.2014         318       91              -
## 4 1487556847 32.00056 -102.1667         284      103              -
## 5 1487556908 31.96417 -102.0717         314      125              -
## 6 1487556970 31.92778 -101.9681         336      160              -
##   updateType altitudeChange                       flight_id flight_no
## 1          Z              C SWA4569-1487312847-airline-0540   SWA4569
## 2          Z              C SWA4569-1487312847-airline-0540   SWA4569
## 3          Z              C SWA4569-1487312847-airline-0540   SWA4569
## 4          Z              C SWA4569-1487312847-airline-0540   SWA4569
## 5          Z              C SWA4569-1487312847-airline-0540   SWA4569
## 6          Z              C SWA4569-1487312847-airline-0540   SWA4569
library(leaflet)
library(maps)
map_usa = map("state", fill = TRUE, plot = FALSE)
m = leaflet(map_usa) %>% addTiles() %>%
  addPolygons(fillColor = topo.colors(10, alpha = NULL), stroke = FALSE)
  
m

Plotting the route plan of flights into Houston

m %>%
  addPolylines(data = routes, lat = ~ latitude, lng = ~ longitude, color="red", , weight = 1)%>% 
  setView(-95.27889, 29.64542, zoom = 6)%>%
  addMarkers(-95.27889, 29.64542, popup = "Destination Houston")

Plotting the Tracks (Flight-specific location tracking data) for all flights into Houston

m %>%
  addPolylines(data = tracks, lat = ~ latitude, lng = ~ longitude, color="red", , weight = 1)%>% 
  setView(-95.27889, 29.64542, zoom = 6)%>%
  addMarkers(-95.27889, 29.64542, popup = "Destination Houston")

We can see that flight tracks ar too different from their planned routes. Lets look deeper into a few individual flights

Plots for flight SWA4569

route_SWA4569 <- subset(routes, flight_no == 'SWA4569')
track_SWA4569 <- subset(tracks, flight_no == 'SWA4569')


m %>%
  addPolylines(data = route_SWA4569, lat = ~ latitude, lng = ~ longitude, color="green", weight = 4)%>%   addPolylines(data = track_SWA4569, lat = ~ latitude, lng = ~ longitude, color="blue", weight = 4)%>% 
  setView(-95.27889, 29.64542, zoom = 6)%>%
  addMarkers(-95.27889, 29.64542, popup = "Destination Houston")

Plots for flight SWA3020

route_SWA3020 <- subset(routes, flight_no == 'SWA3020')
track_SWA3020 <- subset(tracks, flight_no == 'SWA3020')

for(row in 1:length(route_SWA3020)){
  if (route_SWA3020$order[row] == 0){
    origin_lat = route_SWA3020$latitude[row]
    origin_long = route_SWA3020$longitude[row]
  }
}
origin_lat
## [1] 36.12447
origin_long
## [1] -86.67817
m %>%
  addPolylines(data = route_SWA3020, lat = ~ latitude, lng = ~ longitude, color="green", weight = 4)%>%   addPolylines(data = track_SWA3020, lat = ~ latitude, lng = ~ longitude, color="blue", weight = 4)%>% 
  setView(-95.27889, 29.64542, zoom = 8)%>%
  addMarkers(-86.67817, 36.12447, popup = "Origin Airport")%>%
  addMarkers(-95.27889, 29.64542, popup = "Destination Houston") 

Lets look at flight SWA1775

route_SWA1775 <- subset(routes, flight_no == 'SWA1775')
track_SWA1775 <- subset(tracks, flight_no == 'SWA1775')

for(row in 1:length(route_SWA1775)){
  if (route_SWA1775$order[row] == 0){
    origin_lat = route_SWA1775$latitude[row]
    origin_long = route_SWA1775$longitude[row]
  }
}
origin_lat
## [1] 41.78597
origin_long
## [1] -87.75242
m %>%
  addPolylines(data = route_SWA1775, lat = ~ latitude, lng = ~ longitude, color="green", weight = 4)%>%   addPolylines(data = track_SWA1775, lat = ~ latitude, lng = ~ longitude, color="blue", weight = 4)%>% 
  setView(-95.27889, 29.64542, zoom = 8)%>%
  addMarkers(-87.75242, 41.78597, popup = "Origin Airport")%>%
  addMarkers(-95.27889, 29.64542, popup = "Destination Houston") 

Convert datetime form UTC

convertdatetime <- function(unix_time){
  date1 <- as.POSIXct(unix_time, origin = "1970-01-01")
  #return(time)
}

strip_time <- function(date1){
  time <- strftime(date1, format = '%H:%M:%S')

}

tracks["new_date"] <- lapply(tracks["timestamp"], convertdatetime)
tracks["new_time"] <- lapply(tracks["new_date"], strip_time)
View(tracks)



weather["new_date"] <- lapply(weather["time"], convertdatetime)
weather["new_time"] <- lapply(weather["new_date"], strip_time)
View(weather)

Lets plot the altitude of flight SWA4569 against the time

library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
flight_traj1 <- plot_ly(data = track_SWA4569, x = ~longitude, y = ~latitude, z=~altitude, type = 'scatter3d', mode = 'lines', opacity = 1, line = list(width = 2, color = 'red', reverscale = FALSE))

flight_traj1
## Warning in arrange_impl(.data, dots): '.Random.seed' is not an integer
## vector but of type 'NULL', so ignored
flight_traj2 <- plot_ly(data = track_SWA3020, x = ~longitude, y = ~latitude, z=~altitude, type = 'scatter3d', mode = 'lines', opacity = 1, line = list(width = 2, color = 'red', reverscale = FALSE))

flight_traj2
flight_traj3 <- plot_ly(data = track_SWA1775, x = ~longitude, y = ~latitude, z=~altitude, type = 'scatter3d', mode = 'lines', opacity = 1, line = list(width = 2, color = 'red', reverscale = FALSE))

flight_traj3

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.